# Purpose: Run hedonic regressions to estimate whether home prices are responsive to building code requirements
pacman::p_load(tidyverse, fixest, fst, DescTools, data.table, cowplot, tigris, modelsummary, scales, kableExtra)
theme_set(theme_minimal())

# Setup -----
rm(list = ls())

source("code/globals.R")

# Load assessment and transaction data for all CA homes ----
# From 47-add-characteristics-to-sales.R
sales <- read_fst(file.path(WORKING, "allCA-sales-clean.fst"), as.data.table = T)

# Only use homes within at least the 1km border
sales <- sales %>% filter(border_1000 == TRUE)

# Drop homes that were sold before building or during year of construction
# Eliminates pre-sales etc
sales <- sales[combinedyear <= sale_year]

# Only keep sales where we have all teh variables we'll need
sales <- sales %>%
  filter(complete.cases(combinedyear, regime, slope, elev, LotSizeAcres, sqfeet, TotalBedrooms, wildfire_hazard, sales_price_real))

# Only include sales after 1994 (sample very thin before that)
sales <- sales %>% filter(sale_year >= 1994)

# Sales price in 1000s
sales <- sales %>%
  mutate(sales_price_1000s = sales_price_real / 1000)

# Wildfire hazard in %
sales <- sales %>%
  mutate(wildfire_hazard_pct = wildfire_hazard * 100)

# Descriptive statistics for sampled homes ----

vardist <- datasummary(combinedyear + slope + LotSizeAcres + sqfeet1000 + TotalBedrooms + elev + wildfire_hazard_pct + sales_price_1000s ~ 
                                        Mean + SD + P5 + Median + P95,
                    data = sales,
                    fmt = fmt_significant(3),
                    output = "data.frame") 

# Use the data dictionary to rename variables
vardist <- vardist %>% 
  mutate(` ` = dict_names[as.character(` `)])

counts <- tribble(
  ~` `, ~Mean, # Match names from vardist etc
  "Homes", comma(n_distinct(sales$ImportParcelID)),
  "Counties", comma(n_distinct(sales$FIPS)),
  "Census blocks", comma(n_distinct(sales$block)))

sum_df <- bind_rows(vardist, counts)
sum_df[is.na(sum_df)] <- "" # Replace NA with empty string

kbl(sum_df, 
    format = "latex",
    escape = F, 
    booktabs = T,
    align = "lccccc",
    linesep = "") %>%
  group_rows(sprintf("Panel A. Home sales (N = %s)", comma(nrow(sales))), 
             start_row = 1, end_row = 8, bold = F, italic = T) %>%
  group_rows("Panel B. Counts", start_row = 9, end_row = 11, bold = F, italic = T) %>%
  write(file.path(RES, "hedonics-descriptive-statistics.tex"))


# Estimate hedonic regressions -----
fits_price <- list()

## Price on vintage period (year built)
# This tells us if homes built after the codes were sold for higher or lower amounts
# i.e., did living in these specific to-code homes become less appealing?

fmla_price <- as.formula("logprice ~ i(treatmentgroup, period, ref = \"No-codes\", ref2 = 0) + slope + i(fuelmodel) + LotSizeAcres + sqfeet1000 + TotalBedrooms + elev + wildfire_hazard | tract^period + street^treatmentgroup + sale_period")

# # Tract + Street FE (2km border)
# fits_price$vintage_allcontrols <- feols(fmla_price, data = sales)

# Limit to places inside 1km of an LRA/SRA border
fits_price$vintage_street_1000 <- feols(fmla_price, data = sales %>% filter(border_1000 == TRUE))

# Limit to places inside 500m of an LRA/SRA border
fits_price$vintage_street_500 <- feols(fmla_price, data = sales %>% filter(border_500 == TRUE))

# Limit to places inside 250 of an LRA/SRA border
fits_price$vintage_street_250 <- feols(fmla_price, data = sales %>% filter(border_250 == TRUE))


# Create table ----

fits_price_tex <- etable(fits_price,
  depvar = T,
  cluster = ~tract,
  order = c("SRA", "LRA"),
  group = list(
    "Fuel Model Controls" = "Fuel",
    "Additional Controls" = c("Square feet", "Bedrooms", "Lot size", "Elevation", "Burn Prob.", "Wildfire hazard")
  ),
  extralines = list(
    "-_Border size" = c("1 km", "500 m", "250 m")
  ),
  fitstat = c("n", "r2", "my"),
  digits = "s3",
  digits.stats = "s3",
  powerBelow = -10,
  interaction.combine = " $\\times$ ",
  tex = T
)

fits_price_tex

fits_price_tex %>% writeLines("results/price-regs.tex")

# Street FE are preferred because substreet have a median of 4 sales and an average of 5.3 sales within each substreet. Not much variation. Street have 15 and 24.2.


# Vitality effects ---
# Do already-built homes experience price changes after the codes come into place?

fits_vitality = list()

fmla_vitality_baseline <- as.formula("logprice ~ i(treatmentgroup, sale_period, ref = \"No-codes\", ref2 = 0) + slope + i(fuelmodel) + LotSizeAcres + sqfeet1000 + TotalBedrooms + elev + wildfire_hazard | tract^period + tract^sale_period + street^treatmentgroup")

fmla_vitality_parcelFE <- as.formula("logprice ~ i(treatmentgroup, sale_period, ref = \"No-codes\", ref2 = 0) | tract^period + tract^sale_period + ImportParcelID")

# # Street FE
# fits_vitality$baseline = feols(fmla_vitality_baseline, data = sales %>% filter(combinedyear <= 1990 & border_1000 == TRUE))

# Parcel FE - 1km
fits_vitality$parcelFE_1000 = feols(fmla_vitality_parcelFE, 
                                    data = sales %>% filter(n_sales > 1 & combinedyear <= 1990 & border_1000 == TRUE))

# Parcel FE - 500m
fits_vitality$parcelFE_500 = feols(fmla_vitality_parcelFE, 
                                   data = sales %>% filter(n_sales > 1 & combinedyear <= 1990 & border_500 == TRUE))

# Parcel FE - 250m
fits_vitality$parcelFE_250 = feols(fmla_vitality_parcelFE, 
                                   data = sales %>% filter(n_sales > 1 & combinedyear <= 1990 & border_250 == TRUE))


etable(fits_vitality, cluster = ~tract)

fits_vitality_tex = etable(fits_vitality, 
                           depvar=T,
                           order=c('^Sold 1998--2007','^Sold 2008--2016','SRA','LRA','slope','Fuel'),
                           dict = dict_names,
                           group = list(
                             "Fuel Model Controls" = "Fuel",
                             "Additional Controls" = c("Square feet", "Bedrooms", "Lot size", "Elevation", "Burn Prob.", "Wildfire hazard")
                           ),
                           extralines = list(
                             "-_Buffer size" = c("1 km", "500 m", "250 m")
                           ),
                           fitstat = c("n", "r2","my"),
                           digits = "s3",
                           digits.stats = 's3',
                           powerBelow=-10,
                           interaction.combine=" $\\times$ ",
                           tex = T)

fits_vitality_tex

fits_vitality_tex %>% writeLines("results/vitality-regs.tex")
